*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*
* Replication Files to: P-hacking in clinical trials and how incentives shape the distribution of results across phases 
* Date: April 2020
* Authors: Jérôme Adda, Christian Decker, and Marco Ottaviani
*
*	- Input: 'data_counterfactual_analysis_sec.dta'
* 			 		 
*	- Output: Figure S3 , Table S5
*			 
* Topic: 'Placebo' Discontinuity Tests with Secondary Outcomes
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*

*------------------------*
* Section 0 -- Directory *
*------------------------*

clear all
clear matrix
cap log close
set more off

*-----*
* 0.1 *
*-----*

* Run the dofiles which defines all the globals for the folder's and sub-folders
* paths to both data and analysis.


if c(username) == "chrdec" {													
	do "C:/Users/chrdec/Dropbox/clinical trials/stata/PNAS revision/2_analysis/dofiles/0_globals.do"   			// 0_globals.do path

}

*-----*
* 0.2 * 
*-----*

* Set up the directory where data are stored
macro list
cd "${data}"
set matsize 11000

cap log off
log using "${run}${log}log_7c.txt", replace

*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~

*-------------------*
* Table of content: *
*-------------------*
* Sections: 
* 1. Load Data
* 2. Table S5 (p-values with free bandwidth)
* 3. Figure S3




*--------------------------*
* Section 1 -- Load Data *
*--------------------------*

use "${data}${final}data_counterfactual_analysis_sec.dta", clear

local threshold=-invnormal(.05/2)

keep if Ph2==1 | Ph3==1

gen SPLIT=top10rev

*------------------------------------------------------*
* Section 2 -- Table S5 (p-values with free bandwidth) *
*------------------------------------------------------*

putexcel set "${run}${table}tabS5.xls", replace

putexcel A1=(" ") B1=("Phase II") C1=("Phase III") 
rddensity z  if z_modifier==0 & Ph2==1,c(`threshold') 
putexcel A2=("All") B2=(round(e(pv_q),0.01)) B3=(e(N))
count if z_modifier==0 & Ph2==1
local aux=r(N)
putexcel B3=("(`aux')")
rddensity z  if z_modifier==0 & Ph3==1,c(`threshold') 
putexcel C2=(round(e(pv_q),0.01)) 
count if z_modifier==0 & Ph3==1
local aux=r(N)
putexcel C3=("(`aux')")

rddensity z  if z_modifier==0 & INDUSTRY==0 & Ph2==1,c(`threshold') 
putexcel A4=("Non-industry") B4=(round(e(pv_q),0.01)) 
count if z_modifier==0 & INDUSTRY==0 & Ph2==1
local aux=r(N)
putexcel B5=("(`aux')")
rddensity z  if z_modifier==0& INDUSTRY==0 & Ph3==1,c(`threshold')
putexcel C4=(round(e(pv_q),0.01))
count  if z_modifier==0& INDUSTRY==0 & Ph3==1
local aux=r(N)
putexcel C5=("(`aux')")

rddensity z  if z_modifier==0& INDUSTRY==1 & Ph2==1,c(`threshold') 
putexcel A6=("All industry") B6=(round(e(pv_q),0.01)) 
count if z_modifier==0& INDUSTRY==1 & Ph2==1
local aux=r(N)
putexcel B7=("(`aux')")
rddensity z  if z_modifier==0& INDUSTRY==1 & Ph3==1,c(`threshold')
putexcel C6=(round(e(pv_q),0.01)) 
count if z_modifier==0& INDUSTRY==1 & Ph3==1
local aux=r(N)
putexcel C7=("(`aux')")

rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==0 & Ph2==1,c(`threshold') 
putexcel A8=("Small industry") B8=(round(e(pv_q),0.01)) 
count if z_modifier==0& INDUSTRY==1  & SPLIT==0 & Ph2==1
local aux=r(N)
putexcel B9=("(`aux')")
rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==0 & Ph3==1,c(`threshold') 
putexcel C8=(round(e(pv_q),0.01)) 
count if z_modifier==0& INDUSTRY==1  & SPLIT==0 & Ph3==1
local aux=r(N)
putexcel C9=("(`aux')")

rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==1 & Ph2==1,c(`threshold') 
putexcel A10=("Top 10 industry") B10=(round(e(pv_q),0.01)) 
count if z_modifier==0& INDUSTRY==1  & SPLIT==1 & Ph2==1
local aux=r(N)
putexcel B11=("(`aux')")
rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==1 & Ph3==1,c(`threshold')
putexcel C10=(round(e(pv_q),0.01)) 
count if z_modifier==0& INDUSTRY==1  & SPLIT==1 & Ph3==1
local aux=r(N)
putexcel C11=("(`aux')")

*------------------------*
* Section 3 -- Figure S3 *
*------------------------*

qui rddensity z  if z_modifier==0 & Ph2==1,c(`threshold') h(1) genvars(Phase2) plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw) 

qui rddensity z  if z_modifier==0 & Ph3==1,c(`threshold') h(1) genvars(Phase3)  plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)

qui rddensity z  if z_modifier==0& INDUSTRY==0 & Ph2==1,c(`threshold') h(1) genvars(Phase2NI)  plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)

qui rddensity z  if z_modifier==0& INDUSTRY==0 & Ph3==1,c(`threshold') h(1) genvars(Phase3NI)  plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)

qui rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==0 & Ph2==1,c(`threshold') h(1) genvars(Phase2SI)  plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)

qui rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==0 & Ph3==1,c(`threshold') h(1) genvars(Phase3SI)  plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)

qui rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==1 & Ph2==1,c(`threshold') h(1) genvars(Phase2big)  plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)

qui rddensity z  if z_modifier==0& INDUSTRY==1  & SPLIT==1 & Ph3==1,c(`threshold') h(1) genvars(Phase3big)   plot plot_range(0 4.5) plot_n(100 100) graph_options(nodraw)



local outcomeList "Phase2 Phase3 Phase2NI Phase3NI Phase2SI Phase3SI Phase2big Phase3big"
foreach outcome of local outcomeList {
replace `outcome'_grid=. if `outcome'_group==1&`outcome'_group[_n-1]==0
replace `outcome'_grid=. if `outcome'_grid>4
replace `outcome'_grid=. if `outcome'_grid<0.05
replace `outcome'_cil=0 if `outcome'_cil<0
replace `outcome'_cir=. if `outcome'_cir>0.75
}



twoway (connected Phase2_f Phase2_grid, msymbol(none) lcolor(ebblue) lwidth(thick) lpattern(dash) cmissing(n)) (connected Phase3_f Phase3_grid, msymbol(none) lcolor(gs6) lwidth(thick) lpattern(solid) cmissing(n)) (rarea Phase2_cil Phase2_cir Phase2_grid, fcolor(ebblue%20) lwidth(none) cmissing(n)) (rarea Phase3_cil Phase3_cir Phase3_grid if Phase3_grid<10, fcolor(gs6%25) lwidth(none) cmissing(n)), ytitle(Density) xtitle(z) title("{bf:A}", pos(11) size(huge)) subtitle("All sponsors",pos(12) size(large)) ylabel( , format(%03.1f)) xline(1.96, lwidth(medthick) lpattern(dash) lcolor(black)) text(0.8 2.325 "z=1.96") plotr(ls(none)) scheme(s1color) name(Cattaneo_All,replace) legend(off) nodraw
//
twoway (connected Phase2NI_f Phase2NI_grid, msymbol(none) lcolor(ebblue) lwidth(thick) lpattern(dash) cmissing(n)) (connected Phase3NI_f Phase3NI_grid, msymbol(none) lcolor(gs6) lwidth(thick) lpattern(solid) cmissing(n)) (rarea Phase2NI_cil Phase2NI_cir Phase2NI_grid, fcolor(ebblue%20) lwidth(none) cmissing(n)) (rarea Phase3NI_cil Phase3NI_cir Phase3NI_grid if Phase3NI_grid<10, fcolor(gs6%25) lwidth(none) cmissing(n)), ytitle(Density) xtitle(z) title("{bf:B}", pos(11) size(huge)) subtitle("Non-industry",pos(12) size(large)) ylabel( , format(%03.1f)) xline(1.96, lwidth(medthick) lpattern(dash) lcolor(black))  text(0.8 2.325 "z=1.96") plotr(ls(none)) scheme(s1color) name(Cattaneo_NonIndustry,replace) legend(off) nodraw
//
twoway (connected Phase2SI_f Phase2SI_grid, msymbol(none) lcolor(ebblue) lwidth(thick) lpattern(dash) cmissing(n)) (connected Phase3SI_f Phase3SI_grid, msymbol(none) lcolor(gs6) lwidth(thick) lpattern(solid) cmissing(n)) (rarea Phase2SI_cil Phase2SI_cir Phase2SI_grid, fcolor(ebblue%20) lwidth(none) cmissing(n)) (rarea Phase3SI_cil Phase3SI_cir Phase3SI_grid, fcolor(gs6%25) lwidth(none) cmissing(n)), ytitle(Density) xtitle(z) title("{bf:D}", pos(11) size(huge)) subtitle("Small industry",pos(12) size(large)) ylabel( , format(%03.1f)) xline(1.96, lwidth(medthick) lpattern(dash) lcolor(black)) legend(order(1 "Phase II" 2 "Phase III"))  text(0.8 2.325 "z=1.96") text(0.32 2.08 "{bf:*}", color(gs6)) plotr(ls(none)) scheme(s1color) name(Cattaneo_smallIndustry,replace) nodraw
//
twoway (connected Phase2big_f Phase2big_grid, msymbol(none) lcolor(ebblue) lwidth(thick) lpattern(dash) cmissing(n)) (connected Phase3big_f Phase3big_grid, msymbol(none) lcolor(gs6) lwidth(thick) lpattern(solid) cmissing(n)) (rarea Phase2big_cil Phase2big_cir Phase2big_grid, fcolor(ebblue%20) lwidth(none) cmissing(n)) (rarea Phase3big_cil Phase3big_cir Phase3big_grid, fcolor(gs6%25) lwidth(none) cmissing(n)), ytitle(Density) xtitle(z) title("{bf:C}", pos(11) size(huge)) subtitle("Top 10 industry",pos(12) size(large)) ylabel( , format(%03.1f)) xline(1.96, lwidth(medthick) lpattern(dash) lcolor(black)) text(0.8 2.325 "z=1.96") plotr(ls(none)) scheme(s1color) name(Cattaneo_top10,replace) legend(off) nodraw
//


grc1leg Cattaneo_All Cattaneo_NonIndustry Cattaneo_top10 Cattaneo_smallIndustry, imargin(0 0 0 0) scheme(s1color) legendfrom(Cattaneo_smallIndustry) name(figS1, replace) ycom ysize(16) xsize(20)
graph export "${run}${graph}figS3.png", replace width(1500)


log close 
cd "${run}${dofiles}"
